#############################################################################
#############################################################################
####Appendix: Non-state Atrocities in Capital Cities - Summary Statistics####
#############################################################################
#############################################################################
library(foreign)
library(doBy)
library(plyr)

# Set working library
setwd("~/Data/Global Analysis/")
# Read in main data
main.data <- read.dta("full.grid.upd.dta")

############################
###Descriptive Statistics###
############################

###Insurgent atrocities
median(main.data$incidentnonstatefull, na.rm=T) 
mean(main.data$incidentnonstatefull, na.rm=T)  
sd(main.data$incidentnonstatefull, na.rm=T)
min(main.data$incidentnonstatefull, na.rm=T)  
max(main.data$incidentnonstatefull, na.rm=T)
###Capital
median(main.data$lag_Capital, na.rm=T) 
mean(main.data$lag_Capital, na.rm=T)  
sd(main.data$lag_Capital, na.rm=T)
min(main.data$lag_Capital, na.rm=T)  
max(main.data$lag_Capital, na.rm=T) 
###Civil conflict
median(main.data$lagcivconflagtemp, na.rm=T) 
mean(main.data$lagcivconflagtemp, na.rm=T)  
sd(main.data$lagcivconflagtemp, na.rm=T)
min(main.data$lagcivconflagtemp, na.rm=T)  
max(main.data$lagcivconflagtemp, na.rm=T) 
###Atrocities (lag)
median(main.data$lagincidentsumfull, na.rm=T) 
mean(main.data$lagincidentsumfull, na.rm=T)  
sd(main.data$lagincidentsumfull, na.rm=T)
min(main.data$lagincidentsumfull, na.rm=T)  
max(main.data$lagincidentsumfull, na.rm=T) 
###Urbanization
median(main.data$lagurban, na.rm=T) 
mean(main.data$lagurban, na.rm=T)  
sd(main.data$lagurban, na.rm=T)
min(main.data$lagurban, na.rm=T)  
max(main.data$lagurban, na.rm=T) 
###GCP
median(main.data$loglagppp, na.rm=T) 
mean(main.data$loglagppp, na.rm=T)  
sd(main.data$loglagppp, na.rm=T)
min(main.data$loglagppp, na.rm=T)  
max(main.data$loglagppp, na.rm=T) 
###Border distance
median(main.data$loglagbdist1, na.rm=T) 
mean(main.data$loglagbdist1, na.rm=T)  
sd(main.data$loglagbdist1, na.rm=T)
min(main.data$loglagbdist1, na.rm=T)  
max(main.data$loglagbdist1, na.rm=T) 
###Population
median(main.data$loglagpop, na.rm=T) 
mean(main.data$loglagpop, na.rm=T)  
sd(main.data$loglagpop, na.rm=T)
min(main.data$loglagpop, na.rm=T)  
max(main.data$loglagpop, na.rm=T) 
###Population
median(main.data$loglagttime, na.rm=T) 
mean(main.data$loglagttime, na.rm=T)  
sd(main.data$loglagttime, na.rm=T)
min(main.data$loglagttime, na.rm=T)  
max(main.data$loglagttime, na.rm=T) 
###Cell area
median(main.data$loglagcellarea, na.rm=T) 
mean(main.data$loglagcellarea, na.rm=T)  
sd(main.data$loglagcellarea, na.rm=T)
min(main.data$loglagcellarea, na.rm=T)  
max(main.data$loglagcellarea, na.rm=T) 
###Polity2
median(main.data$lagp_polity2, na.rm=T) 
mean(main.data$lagp_polity2, na.rm=T)  
sd(main.data$lagp_polity2, na.rm=T)
min(main.data$lagp_polity2, na.rm=T)  
max(main.data$lagp_polity2, na.rm=T)
###Oil
median(main.data$loglagross_oil_prod, na.rm=T) 
mean(main.data$loglagross_oil_prod, na.rm=T)  
sd(main.data$loglagross_oil_prod, na.rm=T)
min(main.data$loglagross_oil_prod, na.rm=T)  
max(main.data$loglagross_oil_prod, na.rm=T) 
###Large city
#subset urban data
u.main.data <- main.data[ which(main.data$lagurban > 0),]
#Create large city indicator
u.main.data$lag.large.urb <- ifelse(u.main.data$lagurban>=quantile(u.main.data$lagurban, 0.95),1,0)
#Summary stats
median(u.main.data$lag.large.urb, na.rm=T) 
mean(u.main.data$lag.large.urb, na.rm=T)  
sd(u.main.data$lag.large.urb, na.rm=T)
min(u.main.data$lag.large.urb, na.rm=T)  
max(u.main.data$lag.large.urb, na.rm=T) 
###Military expenditure
median(main.data$loglagmilex, na.rm=T) 
mean(main.data$loglagmilex, na.rm=T)  
sd(main.data$loglagmilex, na.rm=T)
min(main.data$loglagmilex, na.rm=T)  
max(main.data$loglagmilex, na.rm=T) 
###Mountainous area
median(main.data$lagmnt, na.rm=T) 
mean(main.data$lagmnt, na.rm=T)  
sd(main.data$lagmnt, na.rm=T)
min(main.data$lagmnt, na.rm=T)  
max(main.data$lagmnt, na.rm=T) 
###Nighttime light
median(main.data$laglnNL_sum, na.rm=T) 
mean(main.data$laglnNL_sum, na.rm=T)  
sd(main.data$laglnNL_sum, na.rm=T)
min(main.data$laglnNL_sum, na.rm=T)  
max(main.data$laglnNL_sum, na.rm=T) 
###UCDP one-sided violence incidents
median(main.data$ucdp_inc, na.rm=T) 
mean(main.data$ucdp_inc, na.rm=T)  
sd(main.data$ucdp_inc, na.rm=T)
min(main.data$ucdp_inc, na.rm=T)  
max(main.data$ucdp_inc, na.rm=T) 
###UCDP one-sided violence incidents (lag)
median(main.data$lagucdp_inc, na.rm=T) 
mean(main.data$lagucdp_inc, na.rm=T)  
sd(main.data$lagucdp_inc, na.rm=T)
min(main.data$lagucdp_inc, na.rm=T)  
max(main.data$lagucdp_inc, na.rm=T) 
###Freedom of press
median(main.data$lagfred_press, na.rm=T) 
mean(main.data$lagfred_press, na.rm=T)  
sd(main.data$lagfred_press, na.rm=T)
min(main.data$lagfred_press, na.rm=T)  
max(main.data$lagfred_press, na.rm=T) 


#################################################
###Create a table of affected country capitals###
#################################################
library(countrycode)
o <- subset(main.data, lag_Capital>0)
cap.tab <- summaryBy(incidentnonstatefull ~ ccode, FUN=c("sum"), data=o, na.rm=TRUE)
names(cap.tab) <- c("ccode", "capat")
cap.tab$cname <- countrycode(cap.tab$ccode, origin="cown", "country.name")
cap.tab2 <- cap.tab[ which(cap.tab$capat >0),]
cap.tab2

###Calculate atrocities in the capital as a share of each country's total atrocities
o1 <- summaryBy(incidentnonstatefull ~ ccode, FUN=c("sum"), data=main.data, na.rm=TRUE)
names(o1) <- c("ccode", "totat")
cap.tab3 <- join(cap.tab, o1)
#Remove all countries that did not experience atrocities
cap.tab3 <- subset(cap.tab3, totat>0)
#Create a share indicator
cap.tab3$at_per <- (cap.tab3$capat/cap.tab3$totat)*100
cap.tab3

#################################################
###Frequency histograms of nonstate atrocities###
#################################################

###Histogram of the frequency of atrocities, 1996-2009 (inflated)
pdf("WWAtFreq2.pdf")
par(mfrow=c(1,1))
plot(table(main.data$incidentnonstatefull), lwd=3, ylab = "Frequency", 
     xlab = "Number of Atrocities per Grid Cell-Year",
     main= "Insurgent Atrocities Worldwide (1996-2009)")
dev.off()

###Histogram of the frequency of atrocities, 1996-2009 (zero observations removed)
pdf("WWAtNoZFreq2.pdf")
plot(table(main.data$incidentnonstatefull[main.data$incidentnonstatefull>0]), lwd=3, ylab = "Frequency", 
     xlab = "Number of Atrocities per Grid Cell-Year",
     main= "Non-Zero Insurgent Atrocities Worldwide (1996-2009)")
dev.off()


